
************************************************
*** Replication files list experiment Russia ***
*** Date of data collection: 13 April 2022   ***
*** Philipp Chapkovski and Max Schaub        ***
************************************************



*** Setup ***

** Working directory
*   Note: Set working directory and place the dataset in it; then run the code below

*cd "[your directory]"  // uncomment this line

use "toloka_2022_04_13.dta", clear


** Packages
ssc install coefplot, replace // thanks to Ben Jann
ssc install scheme-burd, replace // thanks to François Briatte
ssc install tab3way, replace // thanks to Philip Ryan
ssc install estout, replace // thanks to Ben Jann (once more)
*ssc install kict, replace // thanks to Chi-lin Tsai



*** Coding ***

** Demographics
gen age = playerage
label variable age "Age"
gen under40 = 1 if age<40 
replace under40 = 0 if age>=40 & age!=. 
gen over40 = 1 if age>=40 & age!=. 
replace over40 = 0 if age<40 
label define over40 0 "under 40" 1 "40 and over", replace
label values over40 over40
gen age10 = age/10
label variable age10 "Age in 10y"
gen male = 1 if playergender==0
label variable male "Male"
replace male = 0 if playergender==1
label define male 0 "Female" 1 "Male", replace
label values male male
gen edu = playereducation
label variable edu "Education"
label define edu 0 "High school (including incomplete)" 1 "Secondary vocational education" /// 
	2 "Incomplete higher education" 3 "Higher education" 4 "Two or more degrees", replace
label values edu edu
gen someuni = 1 if edu>1 & edu!=.
replace someuni = 0 if edu<=1
label define someuni 0 "no university" 1 "some university", replace
label values someuni someuni
gen employment = playeremployment
label define employment 0 "Employed (in a private company)" 1 "Employed (in a state-owned company or in the civil service)" 2 "Not formally employed", replace
label values employment employment
label variable employment "Employment status"
gen tv = playermedia_tv
replace tv="0" if tv=="NA"
destring tv, replace
label define tv 0 "TV not major info source" 1 "TV major info source", replace
label values tv tv
label variable tv "TV major info source"
** Treatment status 
gen treat = 1 if playertreatment=="treatment"
replace treat = 0 if playertreatment=="baseline"
label variable treat "Treatment (estimate list experiment)"
** Nr of supported items
gen nlist = playermain
** Direct question: personal support for invasion
* Note: we use answers to the direct question from control condition only
gen directq = playerdirect
replace directq = . if treat==1
gen directq_all = playerdirect

** Weights
* Note: Data from the Russian census 2010, see links below
* Cell percentages in the population:
*				no university		some university	
* 				female	male		female	male
* 40_and_more	26.35	18.42		7.70	5.78
* under_40		12.08	14.46		8.76	6.44
* Sample shares
tab3way under40 male someuni, cellpct nofreq
gen popwgt = .
replace popwgt = 26.35/6.23 if under40==0 & male==0 & someuni==0
replace popwgt = 18.42/8.29 if under40==0 & male==1 & someuni==0
replace popwgt = 7.70/9.99 if under40==0 & male==0 & someuni==1
replace popwgt = 5.78/10.66 if under40==0 & male==1 & someuni==1
replace popwgt = 12.08/11.66 if under40==1 & male==0 & someuni==0
replace popwgt = 14.46/13.82 if under40==1 & male==1 & someuni==0
replace popwgt = 8.76/22.95 if under40==1 & male==0 & someuni==1
replace popwgt = 6.44/16.39 if under40==1 & male==1 & someuni==1



			   
*** Analysis ***



*** Tables

** Table 1
*   Note: Census data from 
*   https://www.gks.ru/free_doc/new_site/perepis2010/croc/Documents/Vol3/pub-03-01.xlsx
*   and 
*   https://www.gks.ru/free_doc/new_site/perepis2010/croc/Documents/Vol1/pub-01-06.xlsx

tabstat male over40 someuni, stat(mean n)


*** Significance of difference between direct question and list experiment
*   Note: here we need to determine the difference between one distribution (direct
*   question and that of a difference in distributions (list experiment); in order to 
*   do so, we use a manual t-test, which requires as input the SD of the difference

* Calculate SD of difference 
* Note: the formula is VarDiff = VarSampleA + VarSampleB
sum nlist if treat==0, d
local var_listc = r(Var) 
di `var_listc'
sum nlist if treat==1, d
local var_listt = r(Var) 
di `var_listt'
* the SD for the difference is
local sd_diff = sqrt((`var_listc' + `var_listt'))
di `sd_diff'

* Test significance of difference between list experiment vs direct question
*   Note: We use the direct input method, syntax is 
*   ttesti #obs1 #mean1 #sd1 #obs2 #mean2 #sd2 [, options2]

* Obtain values
ttest nlist, by(treat)
local obs_diff = round((r(N_1) + r(N_2))/2)
di `obs_diff'
local mean_diff = r(mu_2) - r(mu_1)
di `mean_diff'

sum directq
local obs_direct = r(N)
local mean_direct = r(mean)
local sd_direct = r(sd)

* T-test
ttesti `obs_diff' `mean_diff' `sd_diff' `obs_direct' `mean_direct' `sd_direct' 



*** Figures


** Figure 2

reg nlist treat 
eststo qexperiment: margins, dydx(treat) post coefl
eststo qdirect: reg directq 

coefplot qexperiment qdirect, ///
					name(maineffects, replace) ///
					title("", margin(medlarge)) legend(off) vertical recast(bar) barwidth(0.75) fcolor(*1.5) ///
					ciopts(recast(rcap) lwidth(medthick)) citop format(%9.0f) scheme(burd8) yline(0, lpattern(solid)) ///
					coeflabels(treat = "List experiment" _cons = "Direct question") ///
					ytitle("Support for invasion", size(medlarge)) xtitle("") title("Unweighted", size(medium)) ///
					yscale(range(0.4 (.1) .8)) ylab(0.4 (.1) .8, format(%9.1f))
graph display , ysize(1) xsize(1) scale(1) 


reg nlist treat [aweight=popwgt]
eststo qexperiment: margins, dydx(treat) post coefl
eststo qdirect: reg directq [aweight=popwgt]

coefplot qexperiment qdirect, ///
					name(maineffects_weighted, replace) ///
					title("", margin(medlarge)) legend(off) vertical recast(bar) barwidth(0.75) fcolor(*1.5) ///
					ciopts(recast(rcap) lwidth(medthick)) citop format(%9.0f) scheme(burd8) yline(0, lpattern(solid)) ///
					coeflabels(treat = "List experiment" _cons = "Direct question") ///
					ytitle("Support for invasion", size(medlarge)) xtitle("") title("Weighted", size(medium)) ///
					yscale(range(0.4 (.1) .8)) ylab(0.4 (.1) .8, format(%9.1f))
graph display , ysize(1) xsize(1) scale(1) 

graph combine maineffects maineffects_weighted, col(2)
graph display , ysize(1) xsize(1.9) scale(1.3) 
graph export "maineffects_combined.png", replace



** Figure 3b

bys tv: reg nlist treat [aweight=popwgt]
bys tv: reg directq [aweight=popwgt]

reg nlist c.treat##i.tv [aweight=popwgt]
eststo qexptv: margins, dydx(treat) by(tv) post coefl
test _b[treat:0bn.tv] = _b[treat:1.tv]
reg directq i.tv [aweight=popwgt]
eststo qdirtv: margins tv, post coefl

coefplot (qexptv, label("List experiment")) (qdirtv, label("Direct question")), ///
					name(condeffects_tv, replace) ///
					title("", margin(medlarge)) vertical recast(bar) barwidth(0.28) fcolor(*1.5) ///
					ciopts(recast(rcap) lwidth(medium)) citop format(%9.0f) scheme(burd8) yline(0, lpattern(solid)) ///
					ytitle("Support for invasion", size(medium)) xtitle("") ///
					yscale(range(0.4 (.1) .9)) ylab(0.4 (.1) .9, format(%9.1f))
graph display , ysize(1) xsize(1) scale(1) 
graph export "condeffects_tv.png", replace




*** Appendix ***



** Regression table 

eststo clear
eststo ulist: 	reg nlist treat 
eststo culist: 	reg nlist treat male age10 edu 
eststo wlist: 	reg nlist treat [aweight=popwgt]
eststo cwlist: 	reg nlist treat male age10 edu [aweight=popwgt]

esttab ulist culist wlist cwlist using "maintab.rtf", ///
	varwidth(45) lab wrap coeflabels(_cons "Constant (estimate direct question)") ///
		b(%9.2f) sfmt(%9.2gc) se compress nogaps  stats(N r2, fmt(%9.0gc %9.2f)) replace 

		

** Testing for design effects

kict deff nlist, condition(treat) nnonkey(3) test gms



** Balance stats 
eststo clear
eststo overall: estpost tabstat male age edu employment tv, columns(statistics) stats(mean sd) missing  
eststo bygroup: estpost tabstat male age edu employment tv, by(treat) columns(statistics) stats(mean sd) missing  nototal
esttab overall bygroup,  main(mean %9.2f) aux(sd %9.2f) unstack  nonote  noobs compress label nogaps nonumber varwidth(15) replace wide 
eststo pvalues: quietly estpost ttest male age edu employment tv, by(treat) 
esttab pvalues, main(b %9.2f) aux(p %9.2f) wide // p-values


* see R script for additional analyses











